In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score 
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier

Loading data¶

In [3]:
df =  pd.read_csv('/Users/naina/desktop/first_project/WA_Fn-UseC_-Telco-Customer-Churn.csv')
df.head()
Out[3]:
customerID gender SeniorCitizen Partner Dependents tenure PhoneService MultipleLines InternetService OnlineSecurity ... DeviceProtection TechSupport StreamingTV StreamingMovies Contract PaperlessBilling PaymentMethod MonthlyCharges TotalCharges Churn
0 7590-VHVEG Female 0 Yes No 1 No No phone service DSL No ... No No No No Month-to-month Yes Electronic check 29.85 29.85 No
1 5575-GNVDE Male 0 No No 34 Yes No DSL Yes ... Yes No No No One year No Mailed check 56.95 1889.5 No
2 3668-QPYBK Male 0 No No 2 Yes No DSL Yes ... No No No No Month-to-month Yes Mailed check 53.85 108.15 Yes
3 7795-CFOCW Male 0 No No 45 No No phone service DSL Yes ... Yes Yes No No One year No Bank transfer (automatic) 42.30 1840.75 No
4 9237-HQITU Female 0 No No 2 Yes No Fiber optic No ... No No No No Month-to-month Yes Electronic check 70.70 151.65 Yes

5 rows × 21 columns

Data basic info¶

Data shape¶

In [4]:
df.shape
Out[4]:
(7043, 21)

Data types¶

In [5]:
df.dtypes
Out[5]:
customerID           object
gender               object
SeniorCitizen         int64
Partner              object
Dependents           object
tenure                int64
PhoneService         object
MultipleLines        object
InternetService      object
OnlineSecurity       object
OnlineBackup         object
DeviceProtection     object
TechSupport          object
StreamingTV          object
StreamingMovies      object
Contract             object
PaperlessBilling     object
PaymentMethod        object
MonthlyCharges      float64
TotalCharges         object
Churn                object
dtype: object

Missing values¶

In [6]:
df.isna().sum()
Out[6]:
customerID          0
gender              0
SeniorCitizen       0
Partner             0
Dependents          0
tenure              0
PhoneService        0
MultipleLines       0
InternetService     0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
StreamingTV         0
StreamingMovies     0
Contract            0
PaperlessBilling    0
PaymentMethod       0
MonthlyCharges      0
TotalCharges        0
Churn               0
dtype: int64

Data basic stats¶

In [7]:
df.describe()
Out[7]:
SeniorCitizen tenure MonthlyCharges
count 7043.000000 7043.000000 7043.000000
mean 0.162147 32.371149 64.761692
std 0.368612 24.559481 30.090047
min 0.000000 0.000000 18.250000
25% 0.000000 9.000000 35.500000
50% 0.000000 29.000000 70.350000
75% 0.000000 55.000000 89.850000
max 1.000000 72.000000 118.750000

Data basic Stats and analysis¶

Drop unwanted columns¶

In [8]:
df.drop('customerID',axis='columns',inplace=True) 

Data analysis on each columns¶

Class distribution¶

In [9]:
print(df.Churn.value_counts())
fig = px.pie(df, names=df["Churn"].map({"No":"Non-churn","Yes":"Churn"}), title='Population of Churn and Non-churn group')
fig.update_traces(textinfo='value+percent', textfont_size=18)
fig.update_layout(width=700, height=500)
fig.show()
Churn
No     5174
Yes    1869
Name: count, dtype: int64

gender distribution¶

In [10]:
print(df.gender.value_counts())
fig = px.pie(df, names=df["gender"])
fig.update_traces(textinfo='value+percent', textfont_size=18)
fig.update_layout(width=700, height=500)
fig.show()
gender
Male      3555
Female    3488
Name: count, dtype: int64

gender distribution by class¶

In [11]:
sns.set(style="whitegrid")
plt.figure(figsize=(8,4))
sns.countplot(x='gender', hue='Churn', data=df)
plt.title('gender by Churn')
plt.xlabel('gender')
plt.ylabel('Churn')
plt.show()
/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

No description has been provided for this image

SeniorCitizen distribution¶

In [12]:
print(df.SeniorCitizen.value_counts())
fig = px.pie(df, names=df["gender"])
fig.update_traces(textinfo='value+percent', textfont_size=18)
fig.update_layout(width=700, height=500)
fig.show()
SeniorCitizen
0    5901
1    1142
Name: count, dtype: int64

SeniorCitizen distribution by class¶

In [13]:
sns.set(style="whitegrid")
plt.figure(figsize=(8,4))
sns.countplot(x='SeniorCitizen', hue='Churn', data=df)
plt.title('SeniorCitizen by Churn')
plt.xlabel('SeniorCitizen')
plt.ylabel('Churn')
plt.show()
/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

No description has been provided for this image

Partner distribution¶

In [14]:
print(df.SeniorCitizen.value_counts())
fig = px.pie(df, names=df["Partner"])
fig.update_traces(textinfo='value+percent', textfont_size=18)
fig.update_layout(width=700, height=500)
fig.show()
SeniorCitizen
0    5901
1    1142
Name: count, dtype: int64

Partner distribution by class¶

In [15]:
sns.set(style="whitegrid")
plt.figure(figsize=(8,4))
sns.countplot(x='Partner', hue='Churn', data=df)
plt.title('Partner by Churn')
plt.xlabel('Partner')
plt.ylabel('Churn')
plt.show()
/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

No description has been provided for this image

Dependents distribution¶

In [16]:
print(df.Dependents.value_counts())
fig = px.pie(df, names=df["Dependents"])
fig.update_traces(textinfo='value+percent', textfont_size=18)
fig.update_layout(width=700, height=500)
fig.show()
Dependents
No     4933
Yes    2110
Name: count, dtype: int64

Dependents distribution by class¶

In [17]:
sns.set(style="whitegrid")
plt.figure(figsize=(8,4))
sns.countplot(x='Dependents', hue='Churn', data=df)
plt.title('Dependents by Churn')
plt.xlabel('Dependents')
plt.ylabel('Churn')
plt.show()
/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

No description has been provided for this image

Contract distribution¶

In [18]:
print(df.Dependents.value_counts())
fig = px.pie(df, names=df["Contract"])
fig.update_traces(textinfo='value+percent', textfont_size=18)
fig.update_layout(width=700, height=500)
fig.show()
Dependents
No     4933
Yes    2110
Name: count, dtype: int64

Contract distribution by class¶

In [19]:
sns.set(style="whitegrid")
plt.figure(figsize=(8,4))
sns.countplot(x='Contract', hue='Churn', data=df)
plt.title('Contract by Churn')
plt.xlabel('Contract')
plt.ylabel('Churn')
plt.show()
/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

No description has been provided for this image

Converting TotalCharges to numerical¶

In [20]:
df =  df[df["TotalCharges"] != ' ']
df["TotalCharges"] =  df["TotalCharges"].apply(float)

tenure analysis for churn class¶

In [21]:
tenure_churn_no = df[df.Churn=='No'].tenure
tenure_churn_yes = df[df.Churn=='Yes'].tenure
plt.xlabel("tenure")
plt.ylabel("Number Of Customers")
plt.title("Customer churn prediction ")
plt.hist([tenure_churn_yes, tenure_churn_no], rwidth=0.9, color=['blue','red'],label=['Churn=Yes','Churn=No'])
plt.legend()
Out[21]:
<matplotlib.legend.Legend at 0x175f9ce90>
No description has been provided for this image

monthly charges analysis¶

In [22]:
monthly_charges_no = df[df.Churn=='No'].MonthlyCharges      
monthly_charges_yes = df[df.Churn=='Yes'].MonthlyCharges      
plt.xlabel("Monthly Charges")
plt.ylabel("Number Of Customers")
plt.title("Customer Churn Prediction Visualiztion")
plt.hist([monthly_charges_yes, monthly_charges_no], rwidth=0.8, color=['blue','red'],label=['Churn=Yes','Churn=No'])
plt.legend()
Out[22]:
<matplotlib.legend.Legend at 0x176550290>
No description has been provided for this image

Contract vs PaymentMethod analysis¶

In [23]:
cross_tab = pd.crosstab(df['Contract'], df['PaymentMethod'])
sns.heatmap(cross_tab, annot=True, fmt='d', cmap='coolwarm')
plt.title('Heatmap of Contract vs. PaymentMethod')
plt.show()
No description has been provided for this image

contact vs InternetService¶

In [24]:
 sns.set(style="whitegrid")
plt.figure(figsize=(8,4))
sns.countplot(x='InternetService', hue='Contract', data=df)
plt.title('Contract by internet service Type')
plt.xlabel('Internet service Type')
plt.ylabel('Contract Tpe')
plt.show()
/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

No description has been provided for this image

PaymentMethod vs InternetService¶

In [78]:
sns.set(style="whitegrid")
plt.figure(figsize=(10,6))
sns.countplot(x='PaymentMethod', hue='InternetService', data=df)
plt.title('Internet service by payment method')
plt.xlabel('Payment Method')
plt.ylabel('Internet Service')
plt.show()
/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

No description has been provided for this image

function for printing unique values in each columns¶

In [26]:
def print_unique_col_values(df):
    for column in df:
        if df[column].dtypes=='object':
            print(f'{column}: {df[column].unique()}')

unique values in each columns¶

In [27]:
print_unique_col_values(df)
gender: ['Female' 'Male']
Partner: ['Yes' 'No']
Dependents: ['No' 'Yes']
PhoneService: ['No' 'Yes']
MultipleLines: ['No phone service' 'No' 'Yes']
InternetService: ['DSL' 'Fiber optic' 'No']
OnlineSecurity: ['No' 'Yes' 'No internet service']
OnlineBackup: ['Yes' 'No' 'No internet service']
DeviceProtection: ['No' 'Yes' 'No internet service']
TechSupport: ['No' 'Yes' 'No internet service']
StreamingTV: ['No' 'Yes' 'No internet service']
StreamingMovies: ['No' 'Yes' 'No internet service']
Contract: ['Month-to-month' 'One year' 'Two year']
PaperlessBilling: ['Yes' 'No']
PaymentMethod: ['Electronic check' 'Mailed check' 'Bank transfer (automatic)'
 'Credit card (automatic)']
Churn: ['No' 'Yes']

Replace No internet service and No phone service with no¶

In [28]:
df.replace('No internet service','No',inplace=True)
df.replace('No phone service','No',inplace=True)
In [29]:
print_unique_col_values(df)
gender: ['Female' 'Male']
Partner: ['Yes' 'No']
Dependents: ['No' 'Yes']
PhoneService: ['No' 'Yes']
MultipleLines: ['No' 'Yes']
InternetService: ['DSL' 'Fiber optic' 'No']
OnlineSecurity: ['No' 'Yes']
OnlineBackup: ['Yes' 'No']
DeviceProtection: ['No' 'Yes']
TechSupport: ['No' 'Yes']
StreamingTV: ['No' 'Yes']
StreamingMovies: ['No' 'Yes']
Contract: ['Month-to-month' 'One year' 'Two year']
PaperlessBilling: ['Yes' 'No']
PaymentMethod: ['Electronic check' 'Mailed check' 'Bank transfer (automatic)'
 'Credit card (automatic)']
Churn: ['No' 'Yes']

Converting yes no columns with 0 and 1¶

In [30]:
yes_no_columns = ['Partner','Dependents','PhoneService','MultipleLines','OnlineSecurity','OnlineBackup',
                  'DeviceProtection','TechSupport','StreamingTV','StreamingMovies','PaperlessBilling','Churn']
for col in yes_no_columns:
    df[col].replace({'Yes': 1,'No': 0},inplace=True)
In [31]:
df.head()
Out[31]:
gender SeniorCitizen Partner Dependents tenure PhoneService MultipleLines InternetService OnlineSecurity OnlineBackup DeviceProtection TechSupport StreamingTV StreamingMovies Contract PaperlessBilling PaymentMethod MonthlyCharges TotalCharges Churn
0 Female 0 1 0 1 0 0 DSL 0 1 0 0 0 0 Month-to-month 1 Electronic check 29.85 29.85 0
1 Male 0 0 0 34 1 0 DSL 1 0 1 0 0 0 One year 0 Mailed check 56.95 1889.50 0
2 Male 0 0 0 2 1 0 DSL 1 1 0 0 0 0 Month-to-month 1 Mailed check 53.85 108.15 1
3 Male 0 0 0 45 0 0 DSL 1 0 1 1 0 0 One year 0 Bank transfer (automatic) 42.30 1840.75 0
4 Female 0 0 0 2 1 0 Fiber optic 0 0 0 0 0 0 Month-to-month 1 Electronic check 70.70 151.65 1

converting male female in 0 and 1¶

In [32]:
df['gender'].replace({'Female':1,'Male':0},inplace=True)

One hot encoding for categorica columns¶

In [33]:
df_dummies = pd.get_dummies(data=df, columns=['InternetService','Contract','PaymentMethod'])
df_dummies.columns
Out[33]:
Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'OnlineSecurity', 'OnlineBackup',
       'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies',
       'PaperlessBilling', 'MonthlyCharges', 'TotalCharges', 'Churn',
       'InternetService_DSL', 'InternetService_Fiber optic',
       'InternetService_No', 'Contract_Month-to-month', 'Contract_One year',
       'Contract_Two year', 'PaymentMethod_Bank transfer (automatic)',
       'PaymentMethod_Credit card (automatic)',
       'PaymentMethod_Electronic check', 'PaymentMethod_Mailed check'],
      dtype='object')

Data normalization for numerical columns¶

In [35]:
cols_to_scale = ['tenure','MonthlyCharges','TotalCharges']
scaler = MinMaxScaler()
df_dummies[cols_to_scale] = scaler.fit_transform(df_dummies[cols_to_scale])

Train Test split¶

In [36]:
X = df_dummies.drop('Churn',axis='columns')
y = df_dummies["Churn"]
In [37]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=43, stratify=y)

class distribution in each class¶

In [38]:
y_train.value_counts()
Out[38]:
Churn
0    4130
1    1495
Name: count, dtype: int64

Logistic regression model Training¶

In [39]:
lr = LogisticRegression()
In [40]:
lr.fit(X_train,y_train)
Out[40]:
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression()

prediction on test data¶

In [41]:
y_pred_lr = lr.predict(X_test)

Calculate score¶

In [42]:
acc_lr = lr.score(X_test, y_pred_lr)
print("Accuracy_lr", acc_lr)
Accuracy_lr 1.0

Calculate precision and recall¶

In [43]:
precision_lr = precision_score(y_test, y_pred_lr)
recall_lr = recall_score(y_test, y_pred_lr)
print("Precision_lr:", precision_lr)
print("Recall_lr:", recall_lr)
Precision_lr: 0.6574074074074074
Recall_lr: 0.56951871657754

Fine Tune logistic regression model¶

In [44]:
log_reg = LogisticRegression()
In [45]:
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100],
              'penalty': ['l1', 'l2'],
              'solver': ['liblinear', 'saga'],
              'class_weight': ['balanced', None]}
grid_search_lr = GridSearchCV(log_reg, param_grid, cv=5, scoring='accuracy')
grid_search_lr.fit(X_train, y_train)
best_params_lr = grid_search_lr.best_params_
/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

In [46]:
best_log_reg = LogisticRegression(**best_params_lr)
best_log_reg.fit(X_train, y_train)
/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

Out[46]:
LogisticRegression(C=10, penalty='l1', solver='saga')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression(C=10, penalty='l1', solver='saga')

Fine tuned logistic regression model evaluation¶

In [47]:
y_pred_lr_best = best_log_reg.predict(X_test)

Calculate score¶

In [48]:
acc_lr_best = best_log_reg.score(X_test, y_test)
print("Accuracy_lr_best", acc_lr_best)
Accuracy_lr_best 0.8052594171997157

Calculate precision and recall¶

In [49]:
precision_lr_best = precision_score(y_test, y_pred_lr_best)
recall_lr_best = recall_score(y_test, y_pred_lr_best)
print("Precision_lr_best:", precision_lr_best)
print("Recall_lr_best:", recall_lr_best)
Precision_lr_best: 0.6524390243902439
Recall_lr_best: 0.5721925133689839

Train Random forest model¶

In [50]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train, y_train)
Out[50]:
RandomForestClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier()

prediction on test data¶

In [51]:
y_pred_rf = rf_classifier.predict(X_test)

Calculate score¶

In [52]:
acc_rf = rf_classifier.score(X_test, y_test)
print("Accuracy_fr", acc_rf)
Accuracy_fr 0.7910447761194029

Calculate precision and recall¶

In [53]:
precision_rf = precision_score(y_test, y_pred_rf)
recall_rf = recall_score(y_test, y_pred_rf)
print("Precision_rf:", precision_rf)
print("Recall_rf:", recall_rf)
Precision_rf: 0.6282051282051282
Recall_rf: 0.5240641711229946

Fine Tune the random forest model¶

In [54]:
rf_classifier1 = RandomForestClassifier()
param_grid_rf = {'n_estimators': [100, 200, 300],
              'max_depth': [None, 10, 20],
              'min_samples_split': [2, 5, 10]}
grid_search_rf = GridSearchCV(rf_classifier1, param_grid_rf, cv=5, scoring='accuracy')
grid_search_rf.fit(X_train, y_train)
best_params_rf = grid_search_rf.best_params_
best_rf_classifier = RandomForestClassifier(**best_params_rf)
best_rf_classifier.fit(X_train, y_train)
Out[54]:
RandomForestClassifier(max_depth=10, min_samples_split=10, n_estimators=300)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier(max_depth=10, min_samples_split=10, n_estimators=300)

prediction on test data¶

In [55]:
y_pred_rf_best = best_rf_classifier.predict(X_test)

Calculate score¶

In [56]:
acc_rf_best = best_rf_classifier.score(X_test, y_test)
print("Accuracy_rf_best", acc_rf_best)
Accuracy_rf_best 0.8031272210376688

Calculate precision and recall¶

In [57]:
precision_rf_best = precision_score(y_test, y_pred_rf_best)
recall_rf_best = recall_score(y_test, y_pred_rf_best)
print("Precision_rf_best:", precision_rf_best)
print("Recall_rf_best:", recall_rf_best)
Precision_rf_best: 0.6611295681063123
Recall_rf_best: 0.5320855614973262

Balancing the data with smote¶

In [58]:
smote = SMOTE(sampling_strategy='minority')
X_msote, y_smote = smote.fit_resample(X, y)
In [59]:
y_smote.value_counts()
Out[59]:
Churn
0    5163
1    5163
Name: count, dtype: int64

Train test split on balanced data¶

In [60]:
X_train_smote, X_test_smote, y_train_smote, y_test_smote = train_test_split(X_msote, y_smote, test_size=0.2, random_state=43, stratify=y_smote)

Model training on balanced data¶

In [61]:
lr_model_smote = LogisticRegression()
lr_model_smote.fit(X_train_smote, y_train_smote)
/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning:

lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression

Out[61]:
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression()

Model evaluation on smote data¶

In [62]:
y_pred_lr_smote = lr_model_smote.predict(X_test)
In [63]:
acc_lr_smote = lr_model_smote.score(X_test, y_test)
print("Accuracy_lr_smote", acc_lr_smote)
Accuracy_lr_smote 0.7626154939587776

Calculate precision and recall¶

In [64]:
precision_lr_smote = precision_score(y_test, y_pred_lr_smote)
recall_lr_smote = recall_score(y_test, y_pred_lr_smote)
print("Precision_lr_smote:", precision_lr_smote)
print("Recall_lr_smote:", recall_lr_smote)
Precision_lr_smote: 0.5367647058823529
Recall_lr_smote: 0.7807486631016043

Fine tune logistic regression model on balanced data¶

In [65]:
log_reg_smote = LogisticRegression()

param_grid_lr_smote = {'C': [0.001, 0.01, 0.1, 1, 10, 100],
              'penalty': ['l1', 'l2'],
              'solver': ['liblinear', 'saga'],
              'class_weight': ['balanced', None]}
grid_search_lr_smote = GridSearchCV(log_reg_smote, param_grid_lr_smote, cv=5, scoring='accuracy')
grid_search_lr_smote.fit(X_train_smote, y_train_smote)
best_params_lr_smote = grid_search_lr_smote.best_params_

best_log_reg_smote = LogisticRegression(**best_params_lr_smote)
best_log_reg_smote.fit(X_train_smote, y_train_smote)
/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning:

Liblinear failed to converge, increase the number of iterations.

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning:

Liblinear failed to converge, increase the number of iterations.

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning:

Liblinear failed to converge, increase the number of iterations.

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning:

Liblinear failed to converge, increase the number of iterations.

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning:

Liblinear failed to converge, increase the number of iterations.

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning:

Liblinear failed to converge, increase the number of iterations.

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning:

Liblinear failed to converge, increase the number of iterations.

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning:

Liblinear failed to converge, increase the number of iterations.

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning:

Liblinear failed to converge, increase the number of iterations.

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning:

Liblinear failed to converge, increase the number of iterations.

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning:

The max_iter was reached which means the coef_ did not converge

/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning:

Liblinear failed to converge, increase the number of iterations.

Out[65]:
LogisticRegression(C=100, class_weight='balanced', penalty='l1',
                   solver='liblinear')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression(C=100, class_weight='balanced', penalty='l1',
                   solver='liblinear')

Fine tuned logistic regression model evaluation¶

In [66]:
y_pred_lr_best_smote = best_log_reg_smote.predict(X_test)

Calculate score¶

In [67]:
acc_lr_best_smote = best_log_reg_smote.score(X_test, y_test)
print("Accuracy_lr_best_smote", acc_lr_best_smote)
Accuracy_lr_best_smote 0.7825159914712153

Calculate precision and recall¶

In [68]:
precision_lr_best_smote = precision_score(y_test, y_pred_lr_best_smote)
recall_lr_best_smote = recall_score(y_test, y_pred_lr_best_smote)
print("Precision_lr_best_smote:", precision_lr_best_smote)
print("Recall_lr_best_smote:", recall_lr_best_smote)
Precision_lr_best_smote: 0.567193675889328
Recall_lr_best_smote: 0.767379679144385

Train Random forest model on balanced data¶

In [69]:
rf_classifier_smote = RandomForestClassifier()
rf_classifier_smote.fit(X_train_smote, y_train_smote)
Out[69]:
RandomForestClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier()

prediction on test data¶

In [70]:
y_pred_rf_smote = rf_classifier_smote.predict(X_test)

Calculate score¶

In [71]:
acc_rf_smote= rf_classifier_smote.score(X_test, y_test)
print("Accuracy_rf_smote", acc_rf_smote)
Accuracy_rf_smote 0.837953091684435

Calculate precision and recall¶

In [72]:
precision_rf_smote = precision_score(y_test, y_pred_rf_smote)
recall_rf_smote = recall_score(y_test, y_pred_rf_smote)
print("Precision_rf_smote:", precision_rf_smote)
print("Recall_rf_smote:", recall_rf_smote)
Precision_rf_smote: 0.631768953068592
Recall_rf_smote: 0.9358288770053476

Fine Tune the random forest model on balanced data¶

In [73]:
rf_classifier2 = RandomForestClassifier()
param_grid_rf_smote = {'n_estimators': [100, 200, 300],
              'max_depth': [None, 10, 20],
              'min_samples_split': [2, 5, 10]}
grid_search_rf_smote = GridSearchCV(rf_classifier2, param_grid_rf_smote, cv=5, scoring='accuracy')
grid_search_rf_smote.fit(X_train_smote, y_train_smote)
best_params_rf_smote =grid_search_rf_smote.best_params_
best_rf_classifier_smote = RandomForestClassifier(**best_params_rf_smote)
best_rf_classifier_smote.fit(X_train_smote, y_train_smote)
Out[73]:
RandomForestClassifier(max_depth=20, n_estimators=200)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier(max_depth=20, n_estimators=200)

prediction on test data¶

In [74]:
y_pred_rf_best_smote= best_rf_classifier_smote.predict(X_test)

Calculate score¶

In [75]:
acc_rf_best_smote = best_rf_classifier_smote.score(X_test_smote, y_test_smote)
print("Accuracy_rf_best_smote", acc_rf_best_smote)
Accuracy_rf_best_smote 0.8451113262342691

Calculate precision and recall¶

In [76]:
precision_rf_best_smote = precision_score(y_test, y_pred_rf_best_smote)
recall_rf_best_smote = recall_score(y_test, y_pred_rf_best_smote)
print("Precision_rf_best_smote:", precision_rf_best_smote)
print("Recall_rf_best_smote:", recall_rf_best_smote)
Precision_rf_best_smote: 0.6345454545454545
Recall_rf_best_smote: 0.9331550802139037
In [ ]: